How many variables interrogated?

print(length(unique(meta_combined_simple_filtered$name)))
## [1] 8980
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'k' (override with `.groups` argument)
##      n
## 1 4421
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   gender     n
##   <chr>  <int>
## 1 f         79
## 2 m         57
## `summarise()` regrouping output by 'k', 'country' (override with `.groups` argument)

# quantiles of R2

quantile(meta_combined_simple_filtered$mean_r2, probs=c(0.5, .75 , .99), na.rm = T)
##          50%          75%          99% 
## 0.0004814887 0.0012445868 0.0108382462
## `summarise()` regrouping output by 'name' (override with `.groups` argument)
##   50%   75%   80%   90%   95% 97.5%   99% 
##     1     2     3    13    22    28    29
##   50%   75%   80%   90%   95% 97.5%   99% 
##     1     3     5    14    25    28    29
## Joining, by = "key"

R^2 vs. -log10(pvalue), stratified by country number and gender

## Warning: Removed 33 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).

I^2 vs. -log10(pvalue), stratified by survey number and gender

## Warning: Removed 3 rows containing missing values (geom_point).

Summary Statistics Tables

Table 2

identified <- meta_combined_simple_filtered %>% mutate(sig=(pvalue < p_thresh) & (mean_r2 >= R2_thresh))
meta_country_simple_filtered <- meta_country_simple_filtered %>% mutate(sig_country=(pvalue < p_thresh) & mean_r2 >= R2_thresh)
identified %>% filter(sig == T) %>% group_by(gender) %>% tally()
## # A tibble: 2 x 2
##   gender     n
##   <chr>  <int>
## 1 f        373
## 2 m        344
identified %>% filter(sig == T) %>% group_by(sig, gender, num_country_bin) %>% tally()
## # A tibble: 8 x 4
## # Groups:   sig, gender [2]
##   sig   gender num_country_bin     n
##   <lgl> <chr>  <fct>           <int>
## 1 TRUE  f      (0,1]             209
## 2 TRUE  f      (1,10]             39
## 3 TRUE  f      (10,20]            35
## 4 TRUE  f      (20,30]            90
## 5 TRUE  m      (0,1]             236
## 6 TRUE  m      (1,10]             38
## 7 TRUE  m      (10,20]            34
## 8 TRUE  m      (20,30]            36
association_summary_table <- identified %>% group_by(num_country_bin, gender) %>% summarize(n=n(),
  q_25_or=quantile(exp(abs(beta)),probs=.25), q_50_or=quantile(exp(abs(beta)),probs=.5), q_75_or = quantile(exp(abs(beta)),probs=.75), q_25_I2=quantile(I2,probs=.25), q_50_I2=quantile(I2,probs=.5),q_75_I2 = quantile(I2,probs=.75),
  q_25_r2=quantile(mean_r2,probs=.25), q_50_r2=quantile(mean_r2,probs=.5), q_75_r2 = quantile(mean_r2,probs=.75)
  )
## `summarise()` regrouping output by 'num_country_bin' (override with `.groups` argument)
association_summary_table %>% gt() %>% fmt_number(columns = contains('q_'), decimals = 2) %>% fmt_scientific(columns = ends_with("r2"))
gender n q_25_or q_50_or q_75_or q_25_I2 q_50_I2 q_75_I2 q_25_r2 q_50_r2 q_75_r2
(0,1]
f 4543 1.20 1.62 3.69 0.00 0.00 0.00 8.56 × 10−5 3.58 × 10−4 1.17 × 10−3
m 4089 1.24 1.78 12.67 0.00 0.00 0.00 8.39 × 10−5 3.29 × 10−4 1.12 × 10−3
(1,10]
f 1702 1.19 1.67 136.40 0.00 62.10 99.01 2.56 × 10−4 6.30 × 10−4 1.39 × 10−3
m 1488 1.24 2.30 526.30 0.00 64.85 99.14 2.42 × 10−4 5.91 × 10−4 1.26 × 10−3
(10,20]
f 467 1.15 3.93 110.95 49.80 98.59 99.39 4.10 × 10−4 7.67 × 10−4 1.22 × 10−3
m 368 1.17 3.44 189.43 47.49 98.38 99.41 4.09 × 10−4 7.70 × 10−4 1.57 × 10−3
(20,30]
f 539 1.17 1.65 9.88 56.04 97.57 99.35 4.77 × 10−4 8.42 × 10−4 1.42 × 10−3
m 343 1.26 3.68 56.47 67.70 99.14 99.48 4.71 × 10−4 7.10 × 10−4 1.22 × 10−3
association_summary_table_sig <- identified %>% filter(sig == T) %>% group_by(num_country_bin, gender) %>% summarize(n=n(),
  q_25_or=quantile(exp(abs(beta)),probs=.25), q_50_or=quantile(exp(abs(beta)),probs=.5), q_75_or = quantile(exp(abs(beta)),probs=.75), q_25_I2=quantile(I2,probs=.25), q_50_I2=quantile(I2,probs=.5),q_75_I2 = quantile(I2,probs=.75),
  q_25_r2=quantile(mean_r2,probs=.25), q_50_r2=quantile(mean_r2,probs=.5), q_75_r2 = quantile(mean_r2,probs=.75)
  )
## `summarise()` regrouping output by 'num_country_bin' (override with `.groups` argument)
association_summary_table_sig %>% gt() %>% fmt_number(columns = contains('q_'), decimals = 2) %>% fmt_scientific(columns = ends_with("r2"))
gender n q_25_or q_50_or q_75_or q_25_I2 q_50_I2 q_75_I2 q_25_r2 q_50_r2 q_75_r2
(0,1]
f 209 2.69 871,167.69 3,311,394.30 0.00 0.00 0.00 1.64 × 10−3 3.22 × 10−3 7.64 × 10−3
m 236 1,016,942.20 3,344,900.40 6,751,671.86 0.00 0.00 0.00 1.60 × 10−3 2.50 × 10−3 5.86 × 10−3
(1,10]
f 39 1.55 1.98 14,451.18 0.00 10.48 67.59 1.68 × 10−3 2.63 × 10−3 4.80 × 10−3
m 38 2.31 443,747.77 2,182,770.88 0.00 0.60 86.23 1.28 × 10−3 1.84 × 10−3 3.54 × 10−3
(10,20]
f 35 1.34 1.51 1.92 21.63 51.69 73.08 1.25 × 10−3 1.94 × 10−3 3.20 × 10−3
m 34 1.39 1.56 1.90 17.25 50.51 61.30 1.73 × 10−3 2.27 × 10−3 5.17 × 10−3
(20,30]
f 90 1.36 1.57 1.77 48.98 65.01 75.69 1.72 × 10−3 2.61 × 10−3 3.76 × 10−3
m 36 1.26 1.48 1.87 59.49 64.83 82.21 1.47 × 10−3 2.42 × 10−3 3.14 × 10−3
association_summary_table_sig_29 <- identified %>% filter(sig == T, num_country==29) %>% group_by(gender) %>% summarize(n=n(),
  q_25_or=quantile(exp(abs(beta)),probs=.25), q_50_or=quantile(exp(abs(beta)),probs=.5), q_75_or = quantile(exp(abs(beta)),probs=.75), q_25_I2=quantile(I2,probs=.25), q_50_I2=quantile(I2,probs=.5),q_75_I2 = quantile(I2,probs=.75),
  q_25_r2=quantile(mean_r2,probs=.25), q_50_r2=quantile(mean_r2,probs=.5), q_75_r2 = quantile(mean_r2,probs=.75)
  ) %>% gt() %>% fmt_number(columns = contains('q_'), decimals = 2) %>% fmt_scientific(columns = ends_with("r2"))
## `summarise()` ungrouping output (override with `.groups` argument)
association_summary_table_sig_29
gender n q_25_or q_50_or q_75_or q_25_I2 q_50_I2 q_75_I2 q_25_r2 q_50_r2 q_75_r2
f 31 1.42 1.57 1.82 57.99 69.43 75.01 2.03 × 10−3 2.93 × 10−3 4.02 × 10−3
m 16 1.20 1.39 1.62 59.49 60.60 75.39 1.72 × 10−3 2.51 × 10−3 6.01 × 10−3

Table S1

identified %>% group_by(num_country > 1, gender) %>% summarize(n=n(),
  q_25_or=quantile(exp(abs(beta)),probs=.25), q_50_or=quantile(exp(abs(beta)),probs=.5), q_75_or = quantile(exp(abs(beta)),probs=.75), q_25_I2=quantile(I2,probs=.25), q_50_I2=quantile(I2,probs=.5),q_75_I2 = quantile(I2,probs=.75),
  q_25_r2=quantile(mean_r2,probs=.25), q_50_r2=quantile(mean_r2,probs=.5), q_75_r2 = quantile(mean_r2,probs=.75)
  ) %>% gt() %>% fmt_number(columns = contains('q_'), decimals = 2) %>% fmt_scientific(columns = ends_with("r2"))
## `summarise()` regrouping output by 'num_country > 1' (override with `.groups` argument)
gender n q_25_or q_50_or q_75_or q_25_I2 q_50_I2 q_75_I2 q_25_r2 q_50_r2 q_75_r2
FALSE
f 4543 1.20 1.62 3.69 0.00 0.00 0.00 8.56 × 10−5 3.58 × 10−4 1.17 × 10−3
m 4089 1.24 1.78 12.67 0.00 0.00 0.00 8.39 × 10−5 3.29 × 10−4 1.12 × 10−3
TRUE
f 2708 1.18 1.73 73.83 17.56 75.77 99.22 3.33 × 10−4 7.12 × 10−4 1.36 × 10−3
m 2199 1.24 2.60 263.09 17.77 81.94 99.32 3.01 × 10−4 6.49 × 10−4 1.30 × 10−3

ECDFs of effect sizes

p_ecdf_r2 <- ggplot(identified, aes(mean_r2, color=sig)) 
p_ecdf_r2 <- p_ecdf_r2 + stat_ecdf() + scale_x_continuous(limits=c(0, .03))
p_ecdf_r2 <- p_ecdf_r2 + facet_grid(num_country_bin ~ gender, labeller=labeller(gender = c(f = "Female", m = "Male")))
p_ecdf_r2 <- p_ecdf_r2 + theme(legend.position = 'none') + ylab('Percentile') + xlab('R^2')


p_ecdf_or <- ggplot(identified, aes(exp(abs(beta)), color=sig)) 
p_ecdf_or <- p_ecdf_or + stat_ecdf() + scale_x_log10()
p_ecdf_or <- p_ecdf_or + facet_grid(num_country_bin ~ gender, labeller=labeller(gender = c(f = "Female", m = "Male")))
p_ecdf_or <- p_ecdf_or + theme(legend.position = 'none') + ylab('Percentile') + xlab('Odds Ratio (absolute value)')

p_ecdf_i2 <- ggplot(identified %>% filter(num_country > 1), aes(I2, color=sig)) 
p_ecdf_i2 <- p_ecdf_i2 + stat_ecdf() + scale_colour_discrete(name  ="Identified?")
p_ecdf_i2 <- p_ecdf_i2 + facet_grid(num_country_bin ~ gender, labeller=labeller(gender = c(f = "Female", m = "Male"))) 
p_ecdf_i2 <- p_ecdf_i2 + theme(legend.position = 'bottom') + ylab('Percentile') + xlab('I^2')

plot_grid(p_ecdf_r2, p_ecdf_or, p_ecdf_i2, ncol= 1, labels = c('A', 'B', 'C'), rel_heights = c(2,2,2))
## Warning: Removed 16 rows containing non-finite values (stat_ecdf).

Top Findings

## `summarise()` regrouping output by 'name' (override with `.groups` argument)
## # A tibble: 4 x 3
## # Groups:   gender [2]
##   gender sig       n
##   <chr>  <lgl> <int>
## 1 f      FALSE  6878
## 2 f      TRUE    373
## 3 m      FALSE  5944
## 4 m      TRUE    344
## # A tibble: 4 x 3
## # Groups:   gender [2]
##   gender sig       n
##   <chr>  <lgl> <int>
## 1 f      FALSE   137
## 2 f      TRUE     31
## 3 m      FALSE   138
## 4 m      TRUE     16

Plot main association size figures

Top Associations for females across 11-19 countries

Top Associations for females across 20-29 countries

Top Associations for females across all 29 countries

p_29_f

## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

Top Associations for males across 20-29 countries

Top Associations for males across 11-19 countries

Top Associations for males across all 29 countries

Output Tables

Table of Significant Findings in variables across all 29 countries: Males

datatable(sig_29_males, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )

Table of Significant Findings in variables across all 29 countries: Females

datatable(sig_29_females, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )

betas of males vs. females

females <- identified %>% filter(gender == 'f', num_country > 1)
males <- identified %>% filter(gender == 'm', num_country > 1)
wide_f_m <- females %>% inner_join(males, by='name', suffix=c('_female', '_male'))
wide_f_m <- wide_f_m %>% mutate(identified_in=case_when(
  sig_female & sig_male == TRUE ~ "Male+Female",
  sig_female == TRUE ~ "Female",
  sig_male == TRUE ~ "Male",
  TRUE ~ 'Neither'
))
p <- ggplot(wide_f_m, aes(exp(beta_female), exp(beta_male)))
p <- p + geom_point(alpha=0.5) + scale_y_log10(limits=c(0.001, 100)) + scale_x_log10(limits=c(0.001, 100)) + facet_wrap(~identified_in, ncol=2)
p <- p + xlab('Odds Ratios [female]') + ylab("Odds Ratio [male]") + geom_abline()
p
## Warning: Removed 337 rows containing missing values (geom_point).

wide_f_m %>% group_by(identified_in) %>% summarize(n=n(), cr = cor(beta_female, beta_male))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 3
##   identified_in     n    cr
##   <chr>         <int> <dbl>
## 1 Female           67 0.827
## 2 Male             36 0.752
## 3 Male+Female      34 0.996
## 4 Neither        1288 0.728

Country by country correlation for males

##       25%       50%       75% 
## 0.1029621 0.2047824 0.3064261

Country correlation for females

##       25%       50%       75% 
## 0.1486389 0.2627353 0.3701838

# Country correlation of correlations

##                     correlation_females correlation_males
## correlation_females           1.0000000         0.4923484
## correlation_males             0.4923484         1.0000000